{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "import os.path\n",
    "import geopandas\n",
    "from collections import defaultdict\n",
    "from matplotlib import pyplot as plt\n",
    "import pandas as pd\n",
    "\n",
    "import geo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get probas and info about roofs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_batch(i_batch):\n",
    "    probas_dir = '/server/var/data/OpenSolarMap/probas/chunks/'\n",
    "    probas_filename = probas_dir + 'probas_{:04d}'.format(i_batch)\n",
    "    \n",
    "    if not os.path.isfile(probas_filename):\n",
    "        return\n",
    "    \n",
    "    building_info = pickle.load(open(probas_filename, 'rb'))\n",
    "    \n",
    "    return building_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "building_info_all = {}\n",
    "\n",
    "for i_batch in range(1410):\n",
    "    building_info = get_batch(i_batch)\n",
    "    \n",
    "    if building_info:\n",
    "        building_info_all.update(building_info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ident_list = building_info_all.keys()\n",
    "insee_list = [building_info_all[ident]['INSEE'] for ident in ident_list]\n",
    "x_center_list = [building_info_all[ident]['x_center'] for ident in ident_list]\n",
    "y_center_list = [building_info_all[ident]['y_center'] for ident in ident_list]\n",
    "class_0_list = [building_info_all[ident]['probas'][0] for ident in ident_list]\n",
    "class_1_list = [building_info_all[ident]['probas'][1] for ident in ident_list]\n",
    "class_2_list = [building_info_all[ident]['probas'][2] for ident in ident_list]\n",
    "class_3_list = [building_info_all[ident]['probas'][3] for ident in ident_list]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(index=ident_list, data = {\n",
    "        'insee': insee_list,\n",
    "        'x_center': x_center_list,\n",
    "        'y_center': y_center_list,\n",
    "        'class_0': class_0_list,\n",
    "        'class_1': class_1_list,\n",
    "        'class_2': class_2_list,\n",
    "        'class_3': class_3_list,\n",
    "        }\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.to_csv('../building_probas.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>class_0</th>\n",
       "      <th>class_1</th>\n",
       "      <th>class_2</th>\n",
       "      <th>class_3</th>\n",
       "      <th>insee</th>\n",
       "      <th>x_center</th>\n",
       "      <th>y_center</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>242163151</th>\n",
       "      <td>0.037239</td>\n",
       "      <td>0.459789</td>\n",
       "      <td>0.062222</td>\n",
       "      <td>0.440750</td>\n",
       "      <td>60686</td>\n",
       "      <td>656312.511589</td>\n",
       "      <td>6901681.584753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186069734</th>\n",
       "      <td>0.020745</td>\n",
       "      <td>0.861572</td>\n",
       "      <td>0.043419</td>\n",
       "      <td>0.074263</td>\n",
       "      <td>36044</td>\n",
       "      <td>602548.270058</td>\n",
       "      <td>6633932.520310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165323140</th>\n",
       "      <td>0.950507</td>\n",
       "      <td>0.006132</td>\n",
       "      <td>0.006008</td>\n",
       "      <td>0.037353</td>\n",
       "      <td>57628</td>\n",
       "      <td>994150.729495</td>\n",
       "      <td>6885855.816963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97758028</th>\n",
       "      <td>0.096858</td>\n",
       "      <td>0.848166</td>\n",
       "      <td>0.011252</td>\n",
       "      <td>0.043724</td>\n",
       "      <td>68117</td>\n",
       "      <td>1011015.342309</td>\n",
       "      <td>6780043.150458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145094031</th>\n",
       "      <td>0.056170</td>\n",
       "      <td>0.087757</td>\n",
       "      <td>0.018528</td>\n",
       "      <td>0.837544</td>\n",
       "      <td>40308</td>\n",
       "      <td>383236.306267</td>\n",
       "      <td>6292879.900353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231081127</th>\n",
       "      <td>0.041548</td>\n",
       "      <td>0.723313</td>\n",
       "      <td>0.018310</td>\n",
       "      <td>0.216829</td>\n",
       "      <td>42307</td>\n",
       "      <td>830149.781108</td>\n",
       "      <td>6495661.844682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47111124</th>\n",
       "      <td>0.015291</td>\n",
       "      <td>0.050433</td>\n",
       "      <td>0.848436</td>\n",
       "      <td>0.085840</td>\n",
       "      <td>69123</td>\n",
       "      <td>843470.115736</td>\n",
       "      <td>6517053.026119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154808871</th>\n",
       "      <td>0.280481</td>\n",
       "      <td>0.522076</td>\n",
       "      <td>0.059618</td>\n",
       "      <td>0.137825</td>\n",
       "      <td>38451</td>\n",
       "      <td>872171.570013</td>\n",
       "      <td>6520286.658009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175217148</th>\n",
       "      <td>0.096197</td>\n",
       "      <td>0.094227</td>\n",
       "      <td>0.035955</td>\n",
       "      <td>0.773621</td>\n",
       "      <td>94068</td>\n",
       "      <td>662440.041965</td>\n",
       "      <td>6855630.150467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140943421</th>\n",
       "      <td>0.030778</td>\n",
       "      <td>0.227334</td>\n",
       "      <td>0.276052</td>\n",
       "      <td>0.465836</td>\n",
       "      <td>93073</td>\n",
       "      <td>668684.436288</td>\n",
       "      <td>6873197.492450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63784545</th>\n",
       "      <td>0.928156</td>\n",
       "      <td>0.018890</td>\n",
       "      <td>0.007459</td>\n",
       "      <td>0.045495</td>\n",
       "      <td>66189</td>\n",
       "      <td>695506.089198</td>\n",
       "      <td>6173004.243165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271422091</th>\n",
       "      <td>0.185713</td>\n",
       "      <td>0.044857</td>\n",
       "      <td>0.603249</td>\n",
       "      <td>0.166182</td>\n",
       "      <td>60644</td>\n",
       "      <td>614143.017987</td>\n",
       "      <td>6910837.981824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67766537</th>\n",
       "      <td>0.936479</td>\n",
       "      <td>0.014371</td>\n",
       "      <td>0.006502</td>\n",
       "      <td>0.042648</td>\n",
       "      <td>53130</td>\n",
       "      <td>419388.824297</td>\n",
       "      <td>6781226.352147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>345775005</th>\n",
       "      <td>0.913145</td>\n",
       "      <td>0.005786</td>\n",
       "      <td>0.005526</td>\n",
       "      <td>0.075543</td>\n",
       "      <td>50318</td>\n",
       "      <td>401710.439174</td>\n",
       "      <td>6848898.785940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110552101</th>\n",
       "      <td>0.864172</td>\n",
       "      <td>0.009078</td>\n",
       "      <td>0.015687</td>\n",
       "      <td>0.111062</td>\n",
       "      <td>49244</td>\n",
       "      <td>409251.370357</td>\n",
       "      <td>6702204.140920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>233180760</th>\n",
       "      <td>0.047361</td>\n",
       "      <td>0.157821</td>\n",
       "      <td>0.290705</td>\n",
       "      <td>0.504113</td>\n",
       "      <td>91174</td>\n",
       "      <td>660809.267102</td>\n",
       "      <td>6832309.397114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246767116</th>\n",
       "      <td>0.872646</td>\n",
       "      <td>0.015926</td>\n",
       "      <td>0.030100</td>\n",
       "      <td>0.081328</td>\n",
       "      <td>62673</td>\n",
       "      <td>698929.012825</td>\n",
       "      <td>7008991.545363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229007939</th>\n",
       "      <td>0.053954</td>\n",
       "      <td>0.094370</td>\n",
       "      <td>0.149032</td>\n",
       "      <td>0.702644</td>\n",
       "      <td>88530</td>\n",
       "      <td>951239.886651</td>\n",
       "      <td>6779655.608655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>431924068</th>\n",
       "      <td>0.124305</td>\n",
       "      <td>0.767582</td>\n",
       "      <td>0.009474</td>\n",
       "      <td>0.098639</td>\n",
       "      <td>33387</td>\n",
       "      <td>441982.016094</td>\n",
       "      <td>6442387.684082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366311585</th>\n",
       "      <td>0.908187</td>\n",
       "      <td>0.012426</td>\n",
       "      <td>0.012766</td>\n",
       "      <td>0.066621</td>\n",
       "      <td>82104</td>\n",
       "      <td>524965.905779</td>\n",
       "      <td>6319029.732557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126921177</th>\n",
       "      <td>0.013296</td>\n",
       "      <td>0.392674</td>\n",
       "      <td>0.311779</td>\n",
       "      <td>0.282250</td>\n",
       "      <td>49007</td>\n",
       "      <td>431948.316447</td>\n",
       "      <td>6714921.020184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395110727</th>\n",
       "      <td>0.944874</td>\n",
       "      <td>0.006607</td>\n",
       "      <td>0.007476</td>\n",
       "      <td>0.041043</td>\n",
       "      <td>24372</td>\n",
       "      <td>507153.288756</td>\n",
       "      <td>6452362.354186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200147981</th>\n",
       "      <td>0.275767</td>\n",
       "      <td>0.022201</td>\n",
       "      <td>0.051766</td>\n",
       "      <td>0.650267</td>\n",
       "      <td>49037</td>\n",
       "      <td>401797.082905</td>\n",
       "      <td>6737548.229313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295070897</th>\n",
       "      <td>0.630428</td>\n",
       "      <td>0.007269</td>\n",
       "      <td>0.006185</td>\n",
       "      <td>0.356118</td>\n",
       "      <td>56246</td>\n",
       "      <td>255418.653777</td>\n",
       "      <td>6787736.738123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63265622</th>\n",
       "      <td>0.026119</td>\n",
       "      <td>0.013911</td>\n",
       "      <td>0.929396</td>\n",
       "      <td>0.030574</td>\n",
       "      <td>42218</td>\n",
       "      <td>808045.434828</td>\n",
       "      <td>6481279.793037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367381522</th>\n",
       "      <td>0.253425</td>\n",
       "      <td>0.034400</td>\n",
       "      <td>0.007932</td>\n",
       "      <td>0.704243</td>\n",
       "      <td>03225</td>\n",
       "      <td>656319.354114</td>\n",
       "      <td>6599954.197294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111109295</th>\n",
       "      <td>0.016670</td>\n",
       "      <td>0.079053</td>\n",
       "      <td>0.076860</td>\n",
       "      <td>0.827417</td>\n",
       "      <td>35196</td>\n",
       "      <td>339468.625594</td>\n",
       "      <td>6786208.373869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>435509314</th>\n",
       "      <td>0.019405</td>\n",
       "      <td>0.664410</td>\n",
       "      <td>0.031227</td>\n",
       "      <td>0.284958</td>\n",
       "      <td>03315</td>\n",
       "      <td>689339.809111</td>\n",
       "      <td>6589092.444311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127937481</th>\n",
       "      <td>0.036705</td>\n",
       "      <td>0.083626</td>\n",
       "      <td>0.086336</td>\n",
       "      <td>0.793333</td>\n",
       "      <td>35267</td>\n",
       "      <td>380013.260404</td>\n",
       "      <td>6818879.211822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199374142</th>\n",
       "      <td>0.405980</td>\n",
       "      <td>0.029737</td>\n",
       "      <td>0.106761</td>\n",
       "      <td>0.457522</td>\n",
       "      <td>59179</td>\n",
       "      <td>728527.067551</td>\n",
       "      <td>7022367.889100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299502067</th>\n",
       "      <td>0.169139</td>\n",
       "      <td>0.055260</td>\n",
       "      <td>0.165198</td>\n",
       "      <td>0.610402</td>\n",
       "      <td>56166</td>\n",
       "      <td>224043.504223</td>\n",
       "      <td>6774800.433776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144262732</th>\n",
       "      <td>0.066437</td>\n",
       "      <td>0.799183</td>\n",
       "      <td>0.013080</td>\n",
       "      <td>0.121300</td>\n",
       "      <td>71106</td>\n",
       "      <td>798186.574496</td>\n",
       "      <td>6592845.750334</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82194672</th>\n",
       "      <td>0.747655</td>\n",
       "      <td>0.012394</td>\n",
       "      <td>0.011316</td>\n",
       "      <td>0.228636</td>\n",
       "      <td>29293</td>\n",
       "      <td>186550.385108</td>\n",
       "      <td>6773160.813065</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260517505</th>\n",
       "      <td>0.017021</td>\n",
       "      <td>0.065069</td>\n",
       "      <td>0.872329</td>\n",
       "      <td>0.045580</td>\n",
       "      <td>35238</td>\n",
       "      <td>354267.165917</td>\n",
       "      <td>6790361.987882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220382078</th>\n",
       "      <td>0.925495</td>\n",
       "      <td>0.006189</td>\n",
       "      <td>0.008037</td>\n",
       "      <td>0.060279</td>\n",
       "      <td>53074</td>\n",
       "      <td>428756.127919</td>\n",
       "      <td>6802165.644519</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143368001</th>\n",
       "      <td>0.940455</td>\n",
       "      <td>0.005964</td>\n",
       "      <td>0.006808</td>\n",
       "      <td>0.046772</td>\n",
       "      <td>63064</td>\n",
       "      <td>660004.944677</td>\n",
       "      <td>6527918.237908</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117102460</th>\n",
       "      <td>0.435935</td>\n",
       "      <td>0.105727</td>\n",
       "      <td>0.300659</td>\n",
       "      <td>0.157679</td>\n",
       "      <td>68298</td>\n",
       "      <td>1011565.356314</td>\n",
       "      <td>6802553.679241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95275102</th>\n",
       "      <td>0.539665</td>\n",
       "      <td>0.022148</td>\n",
       "      <td>0.008646</td>\n",
       "      <td>0.429541</td>\n",
       "      <td>35306</td>\n",
       "      <td>337476.665173</td>\n",
       "      <td>6845611.209112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70141783</th>\n",
       "      <td>0.018513</td>\n",
       "      <td>0.639287</td>\n",
       "      <td>0.039066</td>\n",
       "      <td>0.303134</td>\n",
       "      <td>35033</td>\n",
       "      <td>346874.967915</td>\n",
       "      <td>6766543.712229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95906988</th>\n",
       "      <td>0.022420</td>\n",
       "      <td>0.776096</td>\n",
       "      <td>0.009661</td>\n",
       "      <td>0.191823</td>\n",
       "      <td>22050</td>\n",
       "      <td>327131.289496</td>\n",
       "      <td>6829470.160864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80389181</th>\n",
       "      <td>0.014459</td>\n",
       "      <td>0.244643</td>\n",
       "      <td>0.613781</td>\n",
       "      <td>0.127117</td>\n",
       "      <td>69259</td>\n",
       "      <td>846792.974883</td>\n",
       "      <td>6512386.344608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114279827</th>\n",
       "      <td>0.033083</td>\n",
       "      <td>0.109857</td>\n",
       "      <td>0.177787</td>\n",
       "      <td>0.679273</td>\n",
       "      <td>77487</td>\n",
       "      <td>676310.669506</td>\n",
       "      <td>6824807.925200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109642497</th>\n",
       "      <td>0.922405</td>\n",
       "      <td>0.008417</td>\n",
       "      <td>0.005462</td>\n",
       "      <td>0.063716</td>\n",
       "      <td>85294</td>\n",
       "      <td>356648.248655</td>\n",
       "      <td>6592757.163442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133958762</th>\n",
       "      <td>0.942900</td>\n",
       "      <td>0.006628</td>\n",
       "      <td>0.005614</td>\n",
       "      <td>0.044859</td>\n",
       "      <td>57251</td>\n",
       "      <td>935123.861230</td>\n",
       "      <td>6881218.668516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>252485494</th>\n",
       "      <td>0.807766</td>\n",
       "      <td>0.024308</td>\n",
       "      <td>0.018961</td>\n",
       "      <td>0.148965</td>\n",
       "      <td>60012</td>\n",
       "      <td>639198.454747</td>\n",
       "      <td>6907679.870674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>161994322</th>\n",
       "      <td>0.075232</td>\n",
       "      <td>0.092774</td>\n",
       "      <td>0.068682</td>\n",
       "      <td>0.763312</td>\n",
       "      <td>33389</td>\n",
       "      <td>418351.478494</td>\n",
       "      <td>6472582.714646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302874956</th>\n",
       "      <td>0.954523</td>\n",
       "      <td>0.005407</td>\n",
       "      <td>0.004670</td>\n",
       "      <td>0.035400</td>\n",
       "      <td>51184</td>\n",
       "      <td>813102.677179</td>\n",
       "      <td>6845024.947827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123884800</th>\n",
       "      <td>0.893507</td>\n",
       "      <td>0.007885</td>\n",
       "      <td>0.006594</td>\n",
       "      <td>0.092014</td>\n",
       "      <td>83011</td>\n",
       "      <td>986881.992840</td>\n",
       "      <td>6286618.399441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365761184</th>\n",
       "      <td>0.879988</td>\n",
       "      <td>0.009473</td>\n",
       "      <td>0.022244</td>\n",
       "      <td>0.088295</td>\n",
       "      <td>41235</td>\n",
       "      <td>539769.944215</td>\n",
       "      <td>6760394.357421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192712785</th>\n",
       "      <td>0.014445</td>\n",
       "      <td>0.167253</td>\n",
       "      <td>0.051761</td>\n",
       "      <td>0.766541</td>\n",
       "      <td>33097</td>\n",
       "      <td>384214.936051</td>\n",
       "      <td>6449066.778946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69031556</th>\n",
       "      <td>0.046651</td>\n",
       "      <td>0.050721</td>\n",
       "      <td>0.840869</td>\n",
       "      <td>0.061758</td>\n",
       "      <td>24227</td>\n",
       "      <td>554207.601398</td>\n",
       "      <td>6477560.658856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>320810253</th>\n",
       "      <td>0.015930</td>\n",
       "      <td>0.901928</td>\n",
       "      <td>0.012963</td>\n",
       "      <td>0.069179</td>\n",
       "      <td>53152</td>\n",
       "      <td>434827.065744</td>\n",
       "      <td>6766660.270411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273738507</th>\n",
       "      <td>0.946280</td>\n",
       "      <td>0.009313</td>\n",
       "      <td>0.005811</td>\n",
       "      <td>0.038597</td>\n",
       "      <td>01024</td>\n",
       "      <td>866229.119807</td>\n",
       "      <td>6577616.300120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235684976</th>\n",
       "      <td>0.022789</td>\n",
       "      <td>0.841376</td>\n",
       "      <td>0.048912</td>\n",
       "      <td>0.086922</td>\n",
       "      <td>49218</td>\n",
       "      <td>401757.696759</td>\n",
       "      <td>6695728.984433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94593705</th>\n",
       "      <td>0.944391</td>\n",
       "      <td>0.007032</td>\n",
       "      <td>0.006706</td>\n",
       "      <td>0.041871</td>\n",
       "      <td>60007</td>\n",
       "      <td>654223.968850</td>\n",
       "      <td>6921707.843508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>216885717</th>\n",
       "      <td>0.070686</td>\n",
       "      <td>0.080408</td>\n",
       "      <td>0.031234</td>\n",
       "      <td>0.817673</td>\n",
       "      <td>18205</td>\n",
       "      <td>653015.876365</td>\n",
       "      <td>6667925.197058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94556806</th>\n",
       "      <td>0.144674</td>\n",
       "      <td>0.034056</td>\n",
       "      <td>0.017464</td>\n",
       "      <td>0.803805</td>\n",
       "      <td>08491</td>\n",
       "      <td>834609.841726</td>\n",
       "      <td>6961690.604613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>401621994</th>\n",
       "      <td>0.389914</td>\n",
       "      <td>0.020960</td>\n",
       "      <td>0.016662</td>\n",
       "      <td>0.572465</td>\n",
       "      <td>33293</td>\n",
       "      <td>428514.230812</td>\n",
       "      <td>6425052.483493</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>345719917</th>\n",
       "      <td>0.932134</td>\n",
       "      <td>0.012961</td>\n",
       "      <td>0.009075</td>\n",
       "      <td>0.045831</td>\n",
       "      <td>87174</td>\n",
       "      <td>578732.666992</td>\n",
       "      <td>6518373.500790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>233619190</th>\n",
       "      <td>0.026469</td>\n",
       "      <td>0.823004</td>\n",
       "      <td>0.023906</td>\n",
       "      <td>0.126622</td>\n",
       "      <td>37107</td>\n",
       "      <td>537326.042459</td>\n",
       "      <td>6654919.573681</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1373000 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            class_0   class_1   class_2   class_3  insee        x_center  \\\n",
       "242163151  0.037239  0.459789  0.062222  0.440750  60686   656312.511589   \n",
       "186069734  0.020745  0.861572  0.043419  0.074263  36044   602548.270058   \n",
       "165323140  0.950507  0.006132  0.006008  0.037353  57628   994150.729495   \n",
       "97758028   0.096858  0.848166  0.011252  0.043724  68117  1011015.342309   \n",
       "145094031  0.056170  0.087757  0.018528  0.837544  40308   383236.306267   \n",
       "231081127  0.041548  0.723313  0.018310  0.216829  42307   830149.781108   \n",
       "47111124   0.015291  0.050433  0.848436  0.085840  69123   843470.115736   \n",
       "154808871  0.280481  0.522076  0.059618  0.137825  38451   872171.570013   \n",
       "175217148  0.096197  0.094227  0.035955  0.773621  94068   662440.041965   \n",
       "140943421  0.030778  0.227334  0.276052  0.465836  93073   668684.436288   \n",
       "63784545   0.928156  0.018890  0.007459  0.045495  66189   695506.089198   \n",
       "271422091  0.185713  0.044857  0.603249  0.166182  60644   614143.017987   \n",
       "67766537   0.936479  0.014371  0.006502  0.042648  53130   419388.824297   \n",
       "345775005  0.913145  0.005786  0.005526  0.075543  50318   401710.439174   \n",
       "110552101  0.864172  0.009078  0.015687  0.111062  49244   409251.370357   \n",
       "233180760  0.047361  0.157821  0.290705  0.504113  91174   660809.267102   \n",
       "246767116  0.872646  0.015926  0.030100  0.081328  62673   698929.012825   \n",
       "229007939  0.053954  0.094370  0.149032  0.702644  88530   951239.886651   \n",
       "431924068  0.124305  0.767582  0.009474  0.098639  33387   441982.016094   \n",
       "366311585  0.908187  0.012426  0.012766  0.066621  82104   524965.905779   \n",
       "126921177  0.013296  0.392674  0.311779  0.282250  49007   431948.316447   \n",
       "395110727  0.944874  0.006607  0.007476  0.041043  24372   507153.288756   \n",
       "200147981  0.275767  0.022201  0.051766  0.650267  49037   401797.082905   \n",
       "295070897  0.630428  0.007269  0.006185  0.356118  56246   255418.653777   \n",
       "63265622   0.026119  0.013911  0.929396  0.030574  42218   808045.434828   \n",
       "367381522  0.253425  0.034400  0.007932  0.704243  03225   656319.354114   \n",
       "111109295  0.016670  0.079053  0.076860  0.827417  35196   339468.625594   \n",
       "435509314  0.019405  0.664410  0.031227  0.284958  03315   689339.809111   \n",
       "127937481  0.036705  0.083626  0.086336  0.793333  35267   380013.260404   \n",
       "199374142  0.405980  0.029737  0.106761  0.457522  59179   728527.067551   \n",
       "...             ...       ...       ...       ...    ...             ...   \n",
       "299502067  0.169139  0.055260  0.165198  0.610402  56166   224043.504223   \n",
       "144262732  0.066437  0.799183  0.013080  0.121300  71106   798186.574496   \n",
       "82194672   0.747655  0.012394  0.011316  0.228636  29293   186550.385108   \n",
       "260517505  0.017021  0.065069  0.872329  0.045580  35238   354267.165917   \n",
       "220382078  0.925495  0.006189  0.008037  0.060279  53074   428756.127919   \n",
       "143368001  0.940455  0.005964  0.006808  0.046772  63064   660004.944677   \n",
       "117102460  0.435935  0.105727  0.300659  0.157679  68298  1011565.356314   \n",
       "95275102   0.539665  0.022148  0.008646  0.429541  35306   337476.665173   \n",
       "70141783   0.018513  0.639287  0.039066  0.303134  35033   346874.967915   \n",
       "95906988   0.022420  0.776096  0.009661  0.191823  22050   327131.289496   \n",
       "80389181   0.014459  0.244643  0.613781  0.127117  69259   846792.974883   \n",
       "114279827  0.033083  0.109857  0.177787  0.679273  77487   676310.669506   \n",
       "109642497  0.922405  0.008417  0.005462  0.063716  85294   356648.248655   \n",
       "133958762  0.942900  0.006628  0.005614  0.044859  57251   935123.861230   \n",
       "252485494  0.807766  0.024308  0.018961  0.148965  60012   639198.454747   \n",
       "161994322  0.075232  0.092774  0.068682  0.763312  33389   418351.478494   \n",
       "302874956  0.954523  0.005407  0.004670  0.035400  51184   813102.677179   \n",
       "123884800  0.893507  0.007885  0.006594  0.092014  83011   986881.992840   \n",
       "365761184  0.879988  0.009473  0.022244  0.088295  41235   539769.944215   \n",
       "192712785  0.014445  0.167253  0.051761  0.766541  33097   384214.936051   \n",
       "69031556   0.046651  0.050721  0.840869  0.061758  24227   554207.601398   \n",
       "320810253  0.015930  0.901928  0.012963  0.069179  53152   434827.065744   \n",
       "273738507  0.946280  0.009313  0.005811  0.038597  01024   866229.119807   \n",
       "235684976  0.022789  0.841376  0.048912  0.086922  49218   401757.696759   \n",
       "94593705   0.944391  0.007032  0.006706  0.041871  60007   654223.968850   \n",
       "216885717  0.070686  0.080408  0.031234  0.817673  18205   653015.876365   \n",
       "94556806   0.144674  0.034056  0.017464  0.803805  08491   834609.841726   \n",
       "401621994  0.389914  0.020960  0.016662  0.572465  33293   428514.230812   \n",
       "345719917  0.932134  0.012961  0.009075  0.045831  87174   578732.666992   \n",
       "233619190  0.026469  0.823004  0.023906  0.126622  37107   537326.042459   \n",
       "\n",
       "                 y_center  \n",
       "242163151  6901681.584753  \n",
       "186069734  6633932.520310  \n",
       "165323140  6885855.816963  \n",
       "97758028   6780043.150458  \n",
       "145094031  6292879.900353  \n",
       "231081127  6495661.844682  \n",
       "47111124   6517053.026119  \n",
       "154808871  6520286.658009  \n",
       "175217148  6855630.150467  \n",
       "140943421  6873197.492450  \n",
       "63784545   6173004.243165  \n",
       "271422091  6910837.981824  \n",
       "67766537   6781226.352147  \n",
       "345775005  6848898.785940  \n",
       "110552101  6702204.140920  \n",
       "233180760  6832309.397114  \n",
       "246767116  7008991.545363  \n",
       "229007939  6779655.608655  \n",
       "431924068  6442387.684082  \n",
       "366311585  6319029.732557  \n",
       "126921177  6714921.020184  \n",
       "395110727  6452362.354186  \n",
       "200147981  6737548.229313  \n",
       "295070897  6787736.738123  \n",
       "63265622   6481279.793037  \n",
       "367381522  6599954.197294  \n",
       "111109295  6786208.373869  \n",
       "435509314  6589092.444311  \n",
       "127937481  6818879.211822  \n",
       "199374142  7022367.889100  \n",
       "...                   ...  \n",
       "299502067  6774800.433776  \n",
       "144262732  6592845.750334  \n",
       "82194672   6773160.813065  \n",
       "260517505  6790361.987882  \n",
       "220382078  6802165.644519  \n",
       "143368001  6527918.237908  \n",
       "117102460  6802553.679241  \n",
       "95275102   6845611.209112  \n",
       "70141783   6766543.712229  \n",
       "95906988   6829470.160864  \n",
       "80389181   6512386.344608  \n",
       "114279827  6824807.925200  \n",
       "109642497  6592757.163442  \n",
       "133958762  6881218.668516  \n",
       "252485494  6907679.870674  \n",
       "161994322  6472582.714646  \n",
       "302874956  6845024.947827  \n",
       "123884800  6286618.399441  \n",
       "365761184  6760394.357421  \n",
       "192712785  6449066.778946  \n",
       "69031556   6477560.658856  \n",
       "320810253  6766660.270411  \n",
       "273738507  6577616.300120  \n",
       "235684976  6695728.984433  \n",
       "94593705   6921707.843508  \n",
       "216885717  6667925.197058  \n",
       "94556806   6961690.604613  \n",
       "401621994  6425052.483493  \n",
       "345719917  6518373.500790  \n",
       "233619190  6654919.573681  \n",
       "\n",
       "[1373000 rows x 7 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1373000"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get info about communes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "choropleth_map = geopandas.read_file('../communes-20150101-100m.shp')\n",
    "choropleth_map.drop(36181, inplace=True)  # Landerneau"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "choropleth_map['centroid'] = choropleth_map.geometry.centroid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "choropleth_map['centroid_x'] = choropleth_map.centroid.apply(lambda p: p.x)\n",
    "choropleth_map['centroid_y'] = choropleth_map.centroid.apply(lambda p: p.y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "choropleth_map['centroid_x_L93'] = choropleth_map.centroid.apply(lambda p: geo.geo2carto(p.y, p.x)[0])\n",
    "choropleth_map['centroid_y_L93'] = choropleth_map.centroid.apply(lambda p: geo.geo2carto(p.y, p.x)[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "choropleth_map.to_csv('../communes_info.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "communes_info = pd.DataFrame(data={\n",
    "        'insee': choropleth_map.insee,\n",
    "        'nom': choropleth_map.nom,\n",
    "        'surf_m2': choropleth_map.surf_m2,\n",
    "        'centroid_x': choropleth_map.centroid_x,\n",
    "        'centroid_y': choropleth_map.centroid_y,\n",
    "        'centroid_x_L93': choropleth_map.centroid_x_L93,\n",
    "        'centroid_y_L93': choropleth_map.centroid_y_L93,\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "communes_info.to_csv('../communes_info_without_geo.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
